********** Generates general wave1 and wawe2 survey data summary statistics

cd ""
clear 
set more off

use data_survey_merged

* excludes observations identified as low quality by the survey provider: deletes those from wave1 and replace those in wave2 with a blank (so that the wave1 observation is preserved if that one is OK for a given individual but wave2 is not) 
drop if inlist(dynata_outliers,1)
foreach var of varlist w2_* {
	capture replace `var'=. if dynata_outliers_w2==1
	capture replace `var'="." if dynata_outliers_w2==1
	}
	
	
* replaces non-sensical GPA % outliers	
replace GPA_pct=. if GPA_pct==0 | GPA_pct>1	
	
gen measure=""	
	
foreach var in obs av sd     w2_obs w2_av w2_sd     {
	capture drop r_`var'
	capture gen r_`var'=.
	}

gen end=substr(end_date,1,10)
gen end_d=date(end,"MDY")
gen w2_end=substr(w2_end_date,1,10)
gen w2_end_d=date(w2_end,"MDY")
gen t_recontact=w2_end_d-end_d
	
* recodes variables into 0-1
gen AU=cntry==1	
gen CA=cntry==2	
gen UK=cntry==3	
gen US=cntry==4	

* (1=high school, 2=college, 3=employed, 4=unemployed, 5=other)
gen p_high=profession==1
gen p_col=profession==2
gen p_emp=profession==3
gen p_unemp=profession==4
gen p_other=profession==5

* (1: <high school, 2=high schoool, 3=bachelors, 4=masters, 5=other)
gen h_nohigh=highest_edu==1
gen h_high=highest_edu==2
gen h_bc=highest_edu==3
gen h_ma=highest_edu==4
gen h_other=highest_edu==5
	
gen sure_high=(sure_pers==5)

	
* indicator for answering both waves 	
gen both_waves=0
replace both_waves=1 if wave2==1
tab both_waves
gen both_waves_ok=(w2_BFI2_1!=.)
tab both_waves_ok
	
global dems "male age AU CA UK US  p_high p_col p_emp p_unemp p_other h_nohigh h_high h_bc h_ma h_other GPA_pct LS_gen q_mood_beginning_1 q_mood_end_1 sure_pers reliable pc t_recontact length_m"
global traits "extrav consci neurot agree open cognitive"
global facets "sociability assertiveness energy compassion respectfulness trust organization productiveness responsibility anxiety depression em_volatily curiosity aesthetic_sense imagination"
global prefs "GPS_risk GPS_time  GPS_pres_bias GPS_alt GPS_trust GPS_pos_rec GPS_neg_rec_self GPS_neg_rec_self2 GPS_neg_rec_other"
global wellb "LS_gen SWLS q_mood_beginning_1 q_mood_end_1"

	
* recode relevant variables to be 0-10 for reporting (instead of 1-11)
foreach var of varlist GPS_risk GPS_time  GPS_pres_bias GPS_alt GPS_trust GPS_pos_rec 	GPS_neg_rec_self GPS_neg_rec_self2 GPS_neg_rec_other      LS_gen SWLS q_mood_beginning_1 q_mood_end_1 reliable cog_* {
	replace `var'=`var'-1
	replace w2_`var'=w2_`var'-1

}	

local i=1	
foreach var of varlist $dems {
	replace measure="`var'" in `i'
	sum `var',d
	replace r_obs=`r(N)' in `i'
	replace r_av=`r(mean)' in `i'
	replace r_sd=`r(sd)' in `i'


	* w2 stats
	local pre 
	
	if `i'>17 & `i'!=24 {
		local pre w2_
		}
	
	sum `pre'`var' if w2_BFI2_1!=.,d
	replace r_w2_obs=`r(N)' in `i'
	replace r_w2_av=`r(mean)' in `i'
	replace r_w2_sd=`r(sd)' in `i'

	
	
	local ++i
	}
	

*** determine statistically significant predictors of being in both waves rather than w1 only
local i=1
capture drop names 
capture gen names=""
capture drop pval
capture gen pval=.
capture drop coeff
capture gen coeff=.


foreach var of varlist $dems  {

   replace names="`var'" in `i'
   reg both_waves_ok `var' 
   replace pval=r(table)[4,1] in `i'
   replace coeff=r(table)[1,1] in `i'
   
   reg both_waves_ok `var' 
  
   
   local ++i
}

*** Below yields the numbers for Table A.1		
br measure r_*	pval coeff




************* Repeats above for measured constructs (Table A.3)

local i=1	
foreach var of varlist extrav sociability assertiveness energy   consci  organization productiveness responsibility neurot anxiety depression em_volatily agree compassion respectfulness trust  open curiosity aesthetic_sense imagination  SOEP_e SOEP_c   SOEP_n SOEP_a SOEP_o  GPS_risk GPS_time  GPS_pres_bias GPS_alt GPS_trust GPS_pos_rec 	GPS_neg_rec_self GPS_neg_rec_self2 GPS_neg_rec_other      LS_gen SWLS q_mood_beginning_1 q_mood_end_1 cog_* {
	replace measure="`var'" in `i'
	sum `var',d
	replace r_obs=`r(N)' in `i'
	replace r_av=`r(mean)' in `i'
	replace r_sd=`r(sd)' in `i'



	* w2 stats
	local pre 
	
	if `i'>17 & `i'!=23 {
		local pre w2_
		}
	
	sum `pre'`var' if w2_BFI2_1!=.,d
	replace r_w2_obs=`r(N)' in `i'
	replace r_w2_av=`r(mean)' in `i'
	replace r_w2_sd=`r(sd)' in `i'


	local ++i
	}
	

*** determine statistically significant predictors of being in both waves rather than w1 only
local i=1
capture drop names 
capture gen names=""
capture drop pval
capture gen pval=.
capture drop coeff
capture gen coeff=.
foreach var of varlist extrav sociability assertiveness energy   consci  organization productiveness responsibility neurot anxiety depression em_volatily agree compassion respectfulness trust  open curiosity aesthetic_sense imagination  SOEP_e SOEP_c   SOEP_n SOEP_a SOEP_o  GPS_risk GPS_time  GPS_pres_bias GPS_alt GPS_trust GPS_pos_rec 	GPS_neg_rec_self GPS_neg_rec_self2 GPS_neg_rec_other      LS_gen SWLS q_mood_beginning_1 q_mood_end_1 cog_*  {

   replace names="`var'" in `i'
   reg both_waves_ok `var' 
   replace pval=r(table)[4,1] in `i'
   replace coeff=r(table)[1,1] in `i'
   
   local ++i
}

*** Below yields the numbers for Table A.3		
br measure r_*	pval coeff



********** Below code replicates Fig A.2 and A.3 of the paper
* w1w2_variables represent averages across the 2 survey waves
foreach var of varlist  reliable  sure_personality {
	gen w1w2_`var'=(`var'+w2_`var')/2
	* to not loose obs which aren't in w2, just keep the number for w1 - no new information but observations are kept
	replace w1w2_`var'=`var' if w1w2_`var'==.
	}


	

keep w1w2_reliable both_waves_ok w1w2_sure_pers

* shows distrib of average reliability for individuals who did both survey waves (whose responses can be used to calculate test-retest correlations)
hist w1w2_reliable if both_waves_ok==1, start(4) width(0.5) discrete  xlabel(4(.5)10) freq xtitle("Average Self-Reported Reliability Across 2 Survey Waves (0-10 scale)")
graph export w1w2_rel.pdf, replace


* shows distrib of average reliability for individuals who did both survey waves (whose responses can be used to calculate test-retest correlations)
hist w1w2_sure_pers if both_waves_ok==1, start(1) width(0.5) discrete  xlabel(1(0.5)5) freq xtitle("Average Self-Reported Reliability Across 2 Survey Waves (1-5 scale)")
graph export w1w2_sure.pdf, replace	

